#df_nypd = read_csv("https://www.dropbox.com/scl/fi/kf2zk4t1onxzm2vo3lpkq/NYPD_Complaint_Data_Historic.csv?rlkey=ly36vi9v66sno80eir6rohlwn&dl=1", na = "(null)") |>
# janitor::clean_names()
df_nypd = read_csv('data/NYPD_Complaint_Data_Historic.csv') |>
janitor::clean_names()
Number of Cases in Each Year
df_nypd_test <- df_nypd %>%
janitor::clean_names() %>%
mutate(cmplnt_fr_dt = lubridate::mdy(cmplnt_fr_dt))
df_nypd_test <- df_nypd_test %>%
mutate(year = lubridate::year(cmplnt_fr_dt))
year_counts <- df_nypd_test %>%
group_by(year) %>%
summarise(count = n())
plot_ly(data = year_counts, x = ~year, y = ~count, type = 'scatter', mode = 'markers+lines', marker = list(color = viridis(6))) %>%
layout(
title = "Number of Cases in Each Year",
xaxis = list(title = "Year"),
yaxis = list(title = "Number of Cases"),
showlegend = FALSE
)
Number of Cases by Year, Borough, and Law Category
count_data <- df_nypd_test %>%
group_by(boro_nm, year, law_cat_cd) %>%
summarise(count = n()) |>
filter(boro_nm != "(null)")
figure = ggplot(count_data, aes(x = year, y = count, color = law_cat_cd)) +
geom_point(position = position_dodge(width = 0.8)) +
geom_line(aes(group = law_cat_cd), position = position_dodge(width = 0.8)) +
facet_wrap(~boro_nm, ncol = 5) +
labs(title = "Number of Cases by Year, Borough, and Law Category",
x = "Year",
y = "Number of Cases",
color = "Law Category") +
theme_minimal() +
theme(legend.position = "bottom",
strip.background = element_rect(fill = "lightblue", color = "lightblue"),
plot.margin = margin(20, 10, 20, 20, unit = "pt"),
strip.text = element_text(size = 8),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_text(margin = margin(t = 15)),
legend.text = element_text(size = 8))
ggplotly(figure)
Top 10 Types of Crime by Borough and Year